
library(readxl)
time_to_recovery_proportion <- read_excel("~/Desktop/Missouri Law GV/Dataset/time_to_recovery_proportion.xlsx")


library(dplyr)
library(car)

# select MO and the eight states with the lowest Maha distance to MO
data_proportion <- time_to_recovery_proportion %>% 
  filter(state=="Missouri" | state== "Colorado" | state=="Georgia" | state=="Illinois" 
         | state=="Indiana" | state=="Kentucky" | state=="Ohio" | state=="South Carolina"
         | state=="Texas")



# Remove the 2007 data
data_proportion <- data_proportion %>% filter(year!=2007)

# Create an indicator variable about whether the subject is Missouri with year >= 2008
data_proportion$MO_2008ge <- 0
data_proportion$MO_2008ge[which(data_proportion$year>=2008 & data_proportion$state=="Missouri")] <- 1

# Create another data frame with year only 2006, 2008-2013 data
data_proportion2 <- data_proportion %>% filter(year<=2013)

# Turn state and year into factor variables
data_proportion$year <- as.factor(data_proportion$year)
data_proportion$state <- as.factor(data_proportion$state)

data_proportion2$year <- as.factor(data_proportion2$year)
data_proportion2$state <- as.factor(data_proportion2$state)

### fit a linear model for year 2006, 2008-2019 data
fit1 <- lm(proportion~state+year+MO_2008ge-1,data_proportion)
summary(fit1)
Anova(fit1)

library(clubSandwich)
vcovCR(fit1, cluster = data_proportion$state, type = "CR2")


### fit a linear model for year 2006, 2008-2013 data
fit2 <- lm(proportion~state+year+MO_2008ge-1,data_proportion2)
summary(fit2)
Anova(fit2)

vcovCR(fit2, cluster = data_proportion2$state, type = "CR2")





